# ============================================================
# fsQCA: Medicaid Unwinding Procedural Disenrollment Study
# Four conditions: C1, C2 (composite), C3, C4
# March 2026
# ============================================================
# Two analysis variants:
#   Model A — N=50 (SD excluded; no call center)
#   Model B — N=49 (SD + DC excluded; DC structurally distinct)
# ============================================================


# install.packages(c("QCA", "SetMethods", "readxl", "writexl", "dplyr"))
install.packages("admisc")
library(QCA)
library(ggplot2)
library(ggrepel)
library(stargazer)
library(SetMethods)
library(readxl)
install.packages("whitexl")
library(writexl)
install.packages("rlang")
# Check what version actually installed
packageVersion("rlang")
install.packages("dplyr")
library(dplyr)
find.package("rlang")
packageVersion("rlang")
.libPaths()


# ============================================================
# 1. LOAD DATA
# ============================================================

df_raw <- read_excel("~/Desktop/data_mar2026_v3.xlsx", sheet = "Data")

# Model A: exclude SD only (N=50)
df_A <- df_raw %>% filter(id != "SD")

# Model B: exclude SD + DC (N=49)
df_B <- df_raw %>% filter(!id %in% c("SD", "DC"))

cat("Model A N:", nrow(df_A), "\n")
cat("Model B N:", nrow(df_B), "\n\n")


# ============================================================
# 2. CALIBRATION FUNCTION
# ============================================================
# Applied identically to both models

calibrate_all <- function(df) {

  # C1: Income-Based Automation
  # Crisp three-value set: 0=0.0, 1=0.499, 2=1.0
  df$fs_c1 <- ifelse(df$c1_incomest == 0, 0.0,
              ifelse(df$c1_incomest == 1, 0.499, 1.0))

  # C2: Documentation Burden (composite)
  # c2_sum = c2_doc1 (universal) + c2_doc2 (deservingness screening)
  # Range: 2–8 | Anchors: fully_out=3, crossover=4.5, fully_in=7
  # Crossover at 4.5 — no state lands here; avoids exact-0.5 problem
  df$fs_c2 <- calibrate(df$c2_sum, thresholds = c(3, 4.5, 7))

  # C3: Digital Access Barrier
  # % HH <$20k without broadband (ACS 2023 5-yr Table S2801)
  # Range: 0.223–0.372 | Anchors: 0.23, 0.28, 0.34
  df$fs_c3 <- calibrate(df$c3_nobb, thresholds = c(0.23, 0.281, 0.34))

  # C4: Interaction Burden
  # Avg call center abandonment rate, April 2023–June 2024
  # Range: 0.004–0.526 | Anchors: 0.05, 0.15, 0.35
  df$fs_c4 <- calibrate(df$c4_abandon, thresholds = c(0.05, 0.15, 0.35))
  
  # Outcome: Renewal Outcome Index (index2)
  # Min-max normalized within sample, then calibrated
  # Fully out: ≤ 0.20 | Crossover: 0.50 | Fully in: ≥ 0.80
  df$index2_mm <- (df$index2 - min(df$index2)) / (max(df$index2) - min(df$index2))
  df$fs_out    <- calibrate(df$index2_mm, thresholds = c(0.20, 0.50, 0.80))

  # Supplementary: C2a and C2b fuzzy scores (for case-level interpretation)
  df$fs_c2a <- calibrate(df$c2_doc1, thresholds = c(2, 3, 5))
  df$fs_c2b <- calibrate(df$c2_doc2, thresholds = c(0, 1, 3))
  return(df)
}

df_A <- calibrate_all(df_A)
df_B <- calibrate_all(df_B)




# ============================================================
# 3. CROSSOVER CONFLICT CHECK
# ============================================================

check_crossover <- function(df, label) {
  cat("\n---", label, "---\n")
  vars <- c("fs_c1","fs_c2","fs_c3","fs_c4","fs_out")
  names <- c("C1","C2","C3","C4","Outcome")
  any_flag <- FALSE
  for (i in seq_along(vars)) {
    flagged <- df$id[df[[vars[i]]] == 0.5]
    if (length(flagged) > 0) {
      cat("  ⚠ Crossover (0.5) in", names[i], ":", paste(flagged, collapse=", "), "\n")
      any_flag <- TRUE
    }
  }
  if (!any_flag) cat("  ✓ No crossover conflicts\n")
}

cat("=== Crossover Conflict Check ===")
check_crossover(df_A, "Model A (N=50)")
check_crossover(df_B, "Model B (N=49)")


# ============================================================
# 4. CALIBRATED SCORE PREVIEW
# ============================================================

print_scores <- function(df, label) {
  cat("\n=== Calibrated Scores:", label, "===\n")
  print(df %>% select(id, c1_incomest, fs_c1,
                      c2_sum, fs_c2, c3_nobb, fs_c3,
                      c4_abandon, fs_c4, index2, fs_out))
}

print_scores(df_A, "Model A")


# ============================================================
# 5. HELPER: RUN FULL ANALYSIS
# ============================================================

run_fsqca <- function(df, label, output_dir) {

  dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)
  rownames(df) <- df$id

  cat("\n\n")
  cat("============================================================\n")
  cat(" ANALYSIS:", label, "| N =", nrow(df), "\n")
  cat("============================================================\n")

  # --- Truth Table: Positive Outcome ---
  TT_pos <- truthTable(
    data       = df,
    outcome    = "fs_out",
    conditions = c("fs_c1","fs_c2","fs_c3","fs_c4"),
    incl.cut   = 0.80,
    n.cut      = 1,
    show.cases = TRUE,
    complete   = FALSE
  )
  cat("\n--- Truth Table (Positive Outcome) ---\n")
  print(TT_pos)

  # --- Truth Table: Negative Outcome ---
  TT_neg <- truthTable(
    data       = df,
    outcome    = "fs_out",
    conditions = c("fs_c1","fs_c2","fs_c3","fs_c4"),
    incl.cut   = 0.80,
    n.cut      = 1,
    show.cases = TRUE,
    complete   = FALSE,
    neg.out    = TRUE
  )
  cat("\n--- Truth Table (Negative Outcome) ---\n")
  print(TT_neg)

  # --- Minimization: Positive ---
  cat("\n--- Minimization: Positive Outcome ---\n")
  sol_pos <- minimize(TT_pos, details = TRUE, include = "?")
  print(sol_pos)

  # --- Minimization: Negative ---
  cat("\n--- Minimization: Negative Outcome ---\n")
  sol_neg <- minimize(TT_neg, details = TRUE, include = "?")
  print(sol_neg)

  # --- Necessity Analysis ---
  cond_mat <- as.matrix(df %>% select(fs_c1, fs_c2, fs_c3, fs_c4))
  out_vec  <- df$fs_out

  nec <- QCAfit(x = cond_mat, y = out_vec, neg.out = FALSE, necessity = TRUE)
  cat("\n--- Necessity Analysis (Positive Outcome) ---\n")
  print(nec)

  nec_filtered <- nec[nec[,"Cons.Nec"] >= 0.90 & nec[,"RoN"] >= 0.50, , drop=FALSE]
  cat("\nNecessary conditions (Cons.Nec >= 0.90, RoN >= 0.50):\n")
  if (nrow(nec_filtered) == 0) cat("  None.\n") else print(nec_filtered)

  # --- Sufficiency Analysis ---
  suf <- QCAfit(x = cond_mat, y = out_vec, neg.out = FALSE, necessity = FALSE)
  cat("\n--- Sufficiency Analysis (Positive Outcome) ---\n")
  print(suf)

  # --- Export ---
  write_xlsx(df %>% select(id, states, starts_with("fs_"), c2_sum, c2_doc1, c2_doc2,
                           c4_wait, pc1, pc2),
             path = paste0(output_dir, "calibrated_data.xlsx"))
  write_xlsx(as.data.frame(TT_pos$tt),
             path = paste0(output_dir, "truth_table_positive.xlsx"))
  write_xlsx(as.data.frame(TT_neg$tt),
             path = paste0(output_dir, "truth_table_negative.xlsx"))
  write_xlsx(as.data.frame(nec),
             path = paste0(output_dir, "necessity.xlsx"))
  write_xlsx(as.data.frame(suf),
             path = paste0(output_dir, "sufficiency.xlsx"))

  cat("\nResults saved to:", output_dir, "\n")

  return(list(TT_pos=TT_pos, TT_neg=TT_neg,
              sol_pos=sol_pos, sol_neg=sol_neg,
              nec=nec, suf=suf))
}


# ============================================================
# 6. RUN BOTH MODELS
# ============================================================

results_A <- run_fsqca(
  df         = df_A,
  label      = "Model A — N=50 (SD excluded)",
  output_dir = "~/Desktop/fsQCA_Results/ModelA/"
)

results_B <- run_fsqca(
  df         = df_B,
  label      = "Model B — N=49 (SD + DC excluded)",
  output_dir = "~/Desktop/fsQCA_Results/ModelB/"
)


# ============================================================
# 7. COMPARE TRUTH TABLES ACROSS MODELS
# ============================================================

cat("\n\n=== Truth Table Comparison: Model A vs Model B ===\n")
cat("(Configurations that differ in outcome assignment)\n\n")

# 1. Rerun calibration on both datasets
df_A <- calibrate_all(df_A)
df_B <- calibrate_all(df_B)

# 2. Rerun both models
results_A <- run_fsqca(
  df         = df_A,
  label      = "Model A — N=50 (SD excluded)",
  output_dir = "~/Desktop/fsQCA_Results/ModelA/"
)

results_B <- run_fsqca(
  df         = df_B,
  label      = "Model B — N=49 (SD + DC excluded)",
  output_dir = "~/Desktop/fsQCA_Results/ModelB/"
)

# 3. Then run the comparison
tt_A <- as.data.frame(results_A$TT_pos$tt)
tt_B <- as.data.frame(results_B$TT_pos$tt)


tt_A <- as.data.frame(results_A$TT_pos$tt)
tt_B <- as.data.frame(results_B$TT_pos$tt)

# Merge on condition columns to find differences
compare <- merge(tt_A, tt_B,
                 by = c("fs_c1","fs_c2","fs_c3","fs_c4"),
                 suffixes = c("_A","_B"), all = TRUE)
diffs <- compare[!is.na(compare$OUT_A) & !is.na(compare$OUT_B) &
                   compare$OUT_A != compare$OUT_B, ]

if (nrow(diffs) == 0) {
  cat("No differences in outcome assignment between Model A and Model B.\n")
} else {
  cat("Configurations with different outcome assignments:\n")
  print(diffs)
}

results_A <- run_fsqca(
  df         = df_A,
  label      = "Model A — N=50 (SD excluded)",
  output_dir = "~/Desktop/fsQCA_Results/ModelA/"
)

results_B <- run_fsqca(
  df         = df_B,
  label      = "Model B — N=49 (SD + DC excluded)",
  output_dir = "~/Desktop/fsQCA_Results/ModelB/"
)


# Identify which states are causing the 0.5 warning
df_B %>% select(id, fs_c1, fs_c2, fs_c3, fs_c4) %>%
  filter(fs_c1 == 0.5 | fs_c2 == 0.5 | fs_c3 == 0.5 | fs_c4 == 0.5)

# Also get the case number mapping so we can read the truth table
df_B %>% mutate(case_n = row_number()) %>% 
  select(case_n, id, states) %>% print(n=49)


# ============================================================
# CALIBRATION REFERENCE
# ============================================================
# C1  | c1_incomest   | crisp: 0→0.0, 1→0.499, 2→1.0
# C2  | c2_sum (2–8)  | calibrate(x, 3, 4.5, 7)
# C3  | c3_nobb       | calibrate(x, 0.23, 0.281, 0.34)
# C4  | c4_abandon    | calibrate(x, 0.05, 0.15, 0.35)
# OUT | index2        | calibrate(x, 0.33, 0.51, 0.76)
#
# Model A: N=50 (SD excluded — no call center, structural)
# Model B: N=49 (SD + DC excluded — DC city-administered, no state legislature)
#
# Supplementary columns (not in model):
#   fs_c2a — universal compliance burden component
#   fs_c2b — deservingness screening component
#   c4_wait, pc1, pc2 — call center diagnostic measures

df_B %>% filter(id %in% c("KS","TX")) %>% select(id, c3_nobb, fs_c3)
